/*********************************************************************/
/* Creates the analyses and data for charts and regressions in paper */
/*********************************************************************/

clear
set more off
clear matrix
clear mata
set maxvar 30000

graph set window fontface "Times New Roman"

/*--------------------------------------------------*/

//	Section 2: Data

/*--------------------------------------------------*/


/*****************************/
/* 			Figure 1 	     */
/*****************************/

clear
use "$savedata\wave1-7base.dta"

* distribution of responses to first question

gen response_cat = .
replace response_cat = 1 if inrange(exlo80,0,9)
replace response_cat = 2 if inrange(exlo80,10,19)
replace response_cat = 3 if inrange(exlo80,20,29)
replace response_cat = 4 if inrange(exlo80,30,39)
replace response_cat = 5 if inrange(exlo80,40,49)
replace response_cat = 6 if inrange(exlo80,50,59)
replace response_cat = 7 if inrange(exlo80,60,69)
replace response_cat = 8 if inrange(exlo80,70,79)
replace response_cat = 9 if inrange(exlo80,80,89)
replace response_cat = 10 if inrange(exlo80,90,100)

* make graph *
preserve
keep if exlo80 >= 0
count
gen young = age <65
collapse (count) idauniq [pw=wgt], by(response_cat young)
bys young : egen tot = total(idauniq)
replace idauniq = idauniq/tot
drop tot
reshape wide idauniq , i(response_cat) j(young)
graph bar idauniq0 idauniq1, over(response_cat, relabel(1 "0-9" 2 "10-19" 3 "20-29" 4 "30-39" 5 "40-49" 6 "50-59" 7 "60-69" 8 "70-79" 9 "80-89" 10 "90-100")) legend(order(1 "Below 65" 2 "65 and above") ring(0) position(12)) graphregion(color(white)) yscale(range(0 0.25)) ylabel(0 "0%" .05 "5%" .1 "10%" .15 "15%" 0.2 "20%" 0.25 "25%", angle(horizontal) grid glpattern(dash) glcolor(black) glwidth(vthin) nogmin) ytitle("Percentage of responses") b1title("Stated percentage chance of survival")aspect(0.5) ysize(3) bar(1, color(black)) bar(2, color(gs10)) graphregion(margin(2 2 2 2))
graph export "$output\Fig1.eps", replace
graph export "$output\Fig1.pdf", replace
restore

/*********************************************************************/
/* 			Analysis of don't knows and 'impossible' answers 	     */
/*********************************************************************/

clear
use "$savedata\wave1-7base.dta"

preserve
* get number who are asked a question *
drop if exlo80 == -9 | exlo80 == -1
count
* get number of unique individuals asked a question *
bys idauniq : egen first_wave = min(wave)
count if wave == first_wave
* get percentage who say don't know *
gen dk = exlo80 == -8
collapse (mean) dk
export excel "$output\Section2.xls", firstrow(variables) sheetreplace sheet("Don't know")
restore

* analyse 'impossible' responses amongst those who give a numeric answer *
keep if inrange(exlo80,0,100)

gen impossible0 = inrange(exlo80,0,99) & exlo90 != 100 & (exlo80>exlo90 | (exlo90 <0 | exlo90 == .))
gen impossible1 = exlo80 == 0
gen impossible2 = exlo80 == 100
gen impossible3 = exlo90 >= exlo80 & inrange(exlo80,0,100) & inrange(exlo90,0,100) 
gen impossible4 = (exlo90 >= exlo80 & inrange(exlo80,0,100) & inrange(exlo90,0,100)) | (exlo80 == 100 | exlo90 == 100)

preserve
collapse (mean) impossible*
export excel "$output\Section2.xls", firstrow(variables) sheetreplace sheet("Impossible answers - all")
restore

/*--------------------------------------------------*/

//	Section 3: Accuracy of expectations

/*--------------------------------------------------*/

/****************************************************************/
/* Fig 2:  Compare scaled and ONS survival curves 				*/
/****************************************************************/

clear
use "$temp\ons_survival.dta"

keep if dobyear == 1950 & age == 60 
rename prob* ons_prob*
merge 1:1 dobyear age sex using "$temp\prob_survive_scaled.dta"
assert _m != 1
keep if _m == 3
drop _m 

drop age
reshape long ons_prob prob , i(sex dobyear) j(age)
drop if age < 60

keep sex ons_prob prob age

* men *
preserve 
keep if sex == 1
twoway (line prob age, lcolor(red)) (line ons_prob age, lcolor(black) lpattern(dash)),  graphregion(color(white)) xlabel(60[5]110) yscale(range(0 1) outergap(0)) ylabel(0 "0%" 0.1 "10%" 0.2 "20%" 0.3 "30%" 0.4 "40%" 0.5 "50%" 0.6 "60%" 0.7 "70%" 0.8 "80%" 0.9 "90%" 1 "100%", angle(horizontal) grid glpattern(dash) glcolor(black) glwidth(vvthin) nogmin) ytitle("Probability of surviving to at least that age") xtitle("Age") legend(off) aspect(0.8) ysize(4) xsize(5)  plotregion(margin(zero)) graphregion(margin(0 3 0 0))
graph export "$output\Fig2a.eps", replace
restore

* women *
preserve 
keep if sex == 2
twoway (line prob age, lcolor(red)) (line ons_prob age, lcolor(black) lpattern(dash)),  graphregion(color(white))  xlabel(60[5]110) yscale(range(0 1) outergap(0)) ylabel(0 "0%" 0.1 "10%" 0.2 "20%" 0.3 "30%" 0.4 "40%" 0.5 "50%" 0.6 "60%" 0.7 "70%" 0.8 "80%" 0.9 "90%" 1 "100%", angle(horizontal) grid glpattern(dash) glcolor(black) glwidth(vvthin) nogmin) ytitle("Probability of surviving to at least that age") xtitle("Age") aspect(0.8) ysize(4) xsize(5)  plotregion(margin(zero)) graphregion(margin(0 3 0 0)) legend(order(1 "Scaled survival curve" 2 "ONS survival curve") cols(1) ring(0) position(8) region(lstyle(none)))
graph export "$output\Fig2b.eps", replace
graph export "$output\Fig2b.pdf", replace
restore

/****************************************************************/
/* Fig 3:  How do expectation compare to scaled life tables?	*/
/****************************************************************/

clear 
use "$savedata\wave1-7base.dta" 
keep idauniq age sex cohort5 exlo80 exlo90 finstat wave dobyear
keep if inrange(age,50,110)
keep if inrange(dobyear,1920,1959)

* merge in scaled life tables *
merge m:1 age sex dobyear using "$temp\prob_survive_scaled.dta"
assert _m != 1
drop if _m == 2
drop _m

gen cohort10 = .
replace cohort10 = 1 if inrange(cohort5,4,5)
replace cohort10 = 2 if inrange(cohort5,6,7)
replace cohort10 = 3 if inrange(cohort5,8,9)
replace cohort10 = 4 if inrange(cohort5,10,11)

forv a = 50(1)110 {
replace prob`a' = prob`a'*100
}

save "$temp\raw_data_charts.dta", replace

forv c = 1(1)3 { 

clear 
use "$temp\raw_data_charts.dta" 

preserve
keep if cohort10 ==`c'
drop if exlo80<0 | (exlo80 <= exlo90 & exlo90 != .)
drop if wave<6 & finstat~=1 & finstat~=7 & finstat~=14
collapse (mean) exlo80 prob75 prob80 prob85 prob90 prob95 prob100 (count) idauniq, by(age sex)
drop if idauniq <10 
drop idauniq 
tempfile exlo80
save `exlo80'
restore
keep if cohort10 ==`c'
drop if exlo90<0 | (exlo80 <= exlo90 & exlo90 != .)
drop if wave<6 & finstat~=1 & finstat~=7 & finstat~=14
collapse (mean) exlo90 (count) idauniq, by(age sex)
drop if idauniq <10 
drop idauniq 
merge 1:1 age sex using `exlo80'
drop _m

save "$temp\chart3_data_c`c'.dta", replace 

}

*******************************
***
***	Figure 3
***
*******************************

* Make chart * 
clear
use "$temp\chart3_data_c2.dta"
replace exlo90 = exlo80 if inrange(age,70,74)

* men *
preserve 
keep if sex == 1
twoway (scatter exlo80 age if inrange(age,62,65), lpattern(shortdash)  mcolor(gray) connect(l) lcolor(gray)) (scatter prob75 age if inrange(age,62,65),  mcolor(gray) connect(l) lcolor(gray))  (scatter exlo80 age if inrange(age,66,69), lpattern(shortdash) mcolor(red) connect(l) lcolor(red) msymbol(diamond)) (scatter prob80 age if inrange(age,66,69),  mcolor(red) connect(l) lcolor(red) msymbol(diamond)) (scatter exlo90 age if inrange(age,66,74), lpattern(shortdash)  mcolor(green) connect(l) lcolor(green) msymbol(plus)) (scatter prob85 age if inrange(age,66,74),  mcolor(green) connect(l) lcolor(green) msymbol(plus)) (scatter exlo80 age if inrange(age,75,79), lpattern(shortdash)  mcolor(black) connect(l) lcolor(black)  msymbol(triangle)) (scatter prob90 age if inrange(age,75,79),  mcolor(black) connect(l) lcolor(black)  msymbol(triangle)) (scatter exlo80 age if inrange(age,80,84),  mcolor(cyan) connect(l) lpattern(shortdash) lcolor(cyan)  msymbol(square)) (scatter prob95 age if inrange(age,80,84),  mcolor(cyan) connect(l) lcolor(cyan)  msymbol(square)), graphregion(color(white)) yscale(range(0 100) outergap(0)) ylabel(0[10]100, angle(horizontal) grid glpattern(shortdash) glcolor(black) glwidth(vvthin) nogmin) ytitle("Prob. survive to at least that age") xtitle("Age of respondent") legend(off) aspect(1) ysize(4) xsize(4)  plotregion(margin(zero)) graphregion(margin(2 2 2 2))
graph export "$output\Fig3a.eps", replace
graph export "$output\Fig3a.pdf", replace
restore

* women *
preserve 
keep if sex == 2
twoway (scatter exlo80 age if inrange(age,62,65), lpattern(shortdash)  mcolor(gray) connect(l) lcolor(gray)) (scatter prob75 age if inrange(age,62,65),  mcolor(gray) connect(l) lcolor(gray))  (scatter exlo80 age if inrange(age,66,69), lpattern(shortdash) mcolor(red) connect(l) lcolor(red) msymbol(diamond)) (scatter prob80 age if inrange(age,66,69),  mcolor(red) connect(l) lcolor(red) msymbol(diamond)) (scatter exlo90 age if inrange(age,66,74), lpattern(shortdash)  mcolor(green) connect(l) lcolor(green) msymbol(plus)) (scatter prob85 age if inrange(age,66,74),  mcolor(green) connect(l) lcolor(green) msymbol(plus)) (scatter exlo80 age if inrange(age,75,79), lpattern(shortdash)  mcolor(black) connect(l) lcolor(black)  msymbol(triangle)) (scatter prob90 age if inrange(age,75,79),  mcolor(black) connect(l) lcolor(black)  msymbol(triangle)) (scatter exlo80 age if inrange(age,80,84),  mcolor(cyan) connect(l) lpattern(shortdash) lcolor(cyan)  msymbol(square)) (scatter prob95 age if inrange(age,80,84),  mcolor(cyan) connect(l) lcolor(cyan)  msymbol(square)), graphregion(color(white)) yscale(range(0 100) outergap(0)) ylabel(0[10]100, angle(horizontal) grid glpattern(dash) glcolor(black) glwidth(vvthin)) ytitle("Prob. survive to at least that age") xtitle("Age of respondent") legend(order(1 "75 subjective" 2 "75 objective" 3 "80 subjective" 4 "80 objective" 5 "85 subjective" 6 "85 objective" 7 "90 subjective" 8 "90 objective" 9 "95 subjective" 10 "95 objective") cols(1) position(3) region(lstyle(none))) aspect(1) ysize(4) xsize(5.7) plotregion(margin(zero)) graphregion(margin(2 2 2 2))
graph export "$output\Fig3b.eps", replace 
graph export "$output\Fig3b.pdf", replace 
restore

/***********************************************************/
/* Producing subjective surivial curves for group medians  */
/***********************************************************/

clear
use "$temp\raw_data_charts.dta"

keep if inlist(finstat,1,7,14,25,33) //this selects all core sample members
drop if exlo80 < 0 | exlo80 == . // drop if don't answer 
drop if exlo90 < 0 | exlo90 == . // drop if don't answer  
drop if exlo80 <= exlo90 

drop if age == 70 // 3 obs that are aged 70

* export number in each wave that answer 2 Qs *
preserve
collapse (count) idauniq, by(age wave)
reshape wide idauniq , i(age) j(wave)
rename idauniq* obs_w*
export excel "$output\number_obs.xlsx",  firstrow(variables) sheetreplace sheet("Number that have 2 answers")
restore

* export number in each wave for which can make a Weibull curve (can't answer zero prob of survival) *
preserve
drop if exlo80 == 0 | exlo90 == 0 // cannot calculate a Weibull if say that certain to have died at any point
collapse (count) idauniq, by(age wave)
reshape wide idauniq , i(age) j(wave)
rename idauniq* obs_w*
export excel "$output\number_obs.xlsx",  firstrow(variables) sheetreplace sheet("Number that have 2 answers>0")
restore

* Estimate k and lambda based on two points only - for each individual *
gen     sample = (inrange(exlo80,0,100)) & (inrange(exlo90,0,100))  
replace sample = 0 if exlo90>=exlo80
replace exlo80 = exlo80/100 if sample==1
replace exlo90 = exlo90/100 if sample==1
gen     t1 = 75-age if sample==1 & age<66
replace t1 = 80-age if sample==1 & age>65
gen     t2 = 85-age if sample==1
gen k = ln(ln(exlo80)/ln(exlo90))/(ln(t1)-ln(t2))
gen lambda = (1/t1)*((-ln(exlo80))^(1/k))
gen invlambda = 1/lambda
replace invlambda=. if invlambda>500

* select just wave 3 so that each cohort also corresponds to just one age *
keep if wave == 3

gen exlo80NoMiss = exlo80
replace exlo80NoMiss = . if sample~=1 

gen exlo90NoMiss = exlo90
replace exlo90NoMiss = . if sample~=1 

* Generate group level median responses *
gen t3 = 110 - age if sample==1
egen groupExlo80 = median(exlo80NoMiss), by(cohort10 sex)
egen groupExlo90 = median(exlo90NoMiss), by(cohort10 sex)
egen groupt1 = median(t1), by(cohort10 sex)
egen groupt2 = median(t2), by(cohort10 sex)
egen groupt3 = median(t3), by(cohort10 sex)

* Estimate k and lambda based on two points only - for each group *
gen groupk = ln(ln(groupExlo80)/ln(groupExlo90))/(ln(groupt1)-ln(groupt2))
gen grouplambda = (1/groupt1)*((-ln(groupExlo80))^(1/groupk))
gen invgrouplambda = 1/grouplambda
replace invgrouplambda=. if invgrouplambda>500

* Merge in cohort mortality data for given sex and cohort
merge m:1 dobyear sex using "$temp\cohort_mortality.dta"
assert _merge != 1
keep if _merge == 3
drop _merge

* For each person keep mortality data for current age
gen survive_age = .
forv age = 50(1)70 {
replace survive_age = survive`age' if age == `age'
}

* calculate probability of dying before age 110 given current age, cohort and sex
gen exlo3 = survive110/survive_age
* recode exlo3 to 0 if exlo2 is 0 *
replace exlo3 = 0 if (exlo90 == 0 | exlo80 == 0)

* then take median as already done with first and second reports *
egen groupExlo110 = median(exlo3), by(cohort10 sex)
replace exlo3 = groupExlo110

rename groupExlo80 exlo1
rename groupExlo90 exlo2

* Take median age within groups *
egen med_age = median(age), by(cohort10 sex)

* sort data *
bys sex cohort10: gen nSexCohortG10 = _n
keep if nSexCohortG10 == 1
keep exlo? groupt? sample idauniq groupk grouplambda invgrouplambda sex cohort10 med_age
reshape long groupt exlo, i(idauniq) j(obs)
gen minuslexlo = -ln(exlo)

* do non-linear least squares estimation of parameters based on 3 points *
gen groupOverIDlambda = .
gen groupOverIDk = .
forvalues sex = 1/2 {
	forvalues cohort10 = 2/4 {
	
	sum groupk if sex == `sex' & cohort10 == `cohort10'
	local initk = r(mean)
	sum grouplambda if sex == `sex' & cohort10 == `cohort10'
	local initlambda = r(mean)
	di "nl ( minuslexlo =  ( {lambda}*groupt)^{k}) if sex == `sex' & cohort10 == `cohort10', initial(lambda `initlambda' k `initk')"
	nl ( exlo =  exp(- ( {lambda}*groupt)^{k})) if sex == `sex' & cohort10 == `cohort10', initial(lambda `initlambda' k `initk')
	di `sex'
	di `cohort10'
	mat def coeffs = e(b)
	svmat coeffs
	replace groupOverIDlambda = coeffs1[1] if sex == `sex' & cohort10 == `cohort10'
	replace groupOverIDk = coeffs2[1] if sex == `sex' & cohort10 == `cohort10'
	drop coeffs?
	}
}

save "$temp\median_subj_curves.dta", replace

*****************************
***
***		Figure 4
***
*****************************

clear 
use "$temp\prob_survive_scaled.dta"
keep if dobyear == 1945 & age == 60
drop age
reshape long prob , i(sex dobyear) j(age)
keep sex age dobyear prob 
drop if age < 60
save "$temp\obj_curve_F4.dta", replace

clear 
use "$temp\median_subj_curves.dta"

* make graphs *

* make into correct format *
keep if cohort10 == 3 
expand 17
sort sex groupt 
bys sex : gen t = _n - 1 
replace exlo = . if t != groupt 
gen age = med_age + t 
drop groupt med_age 

* make subjective curve *
gen subj_prob =  exp(-(groupOverIDlambda*t)^groupOverIDk)

* merge in objective curve *
gen dobyear = 1945 // take median birth year 
merge 1:1 sex dobyear age using  "$temp\obj_curve_F4.dta"
assert _m == 3
drop _m 

* plot *
* men *
preserve 
keep if sex == 1
twoway (line prob age, lcolor(red)) (line subj_prob age, lcolor(green) lpattern(dash)) (scatter exlo age, mcolor(green)),  graphregion(color(white)) xlabel(60[5]110) yscale(range(0 1) outergap(0)) ylabel(0 "0%" 0.1 "10%" 0.2 "20%" 0.3 "30%" 0.4 "40%" 0.5 "50%" 0.6 "60%" 0.7 "70%" 0.8 "80%" 0.9 "90%" 1 "100%", angle(horizontal) grid glpattern(dash) glcolor(black) glwidth(vvthin) nogmin) ytitle("Probability of surviving to at least that age") xtitle("Age") legend(off) aspect(0.8) ysize(4) xsize(5)  plotregion(margin(zero)) graphregion(margin(0 3 0 0))
graph export "$output\Fig4a.eps", replace
graph export "$output\Fig4a.pdf", replace
restore

* women *
preserve 
keep if sex == 2
twoway (line prob age, lcolor(red)) (line subj_prob age, lcolor(green) lpattern(dash))  (scatter exlo age, mcolor(green)),  graphregion(color(white))  xlabel(60[5]110) yscale(range(0 1) outergap(0)) ylabel(0 "0%" 0.1 "10%" 0.2 "20%" 0.3 "30%" 0.4 "40%" 0.5 "50%" 0.6 "60%" 0.7 "70%" 0.8 "80%" 0.9 "90%" 1 "100%", angle(horizontal) grid glpattern(dash) glcolor(black) glwidth(vvthin) nogmin) ytitle("Probability of surviving to at least that age") xtitle("Age") aspect(0.8) ysize(4) xsize(5)  plotregion(margin(zero)) graphregion(margin(0 3 0 0)) legend(order(1 "Objective curve" 2 "Subjective curve" 3 "Actual reports") cols(1) ring(0) position(8) region(lstyle(none)))
graph export "$output\Fig4b.eps", replace
graph export "$output\Fig4b.pdf", replace
restore